remove(list = ls())

library(tidyverse)
library(randomForest)
library(lme4)
library(stargazer)
library(caret)
library(ggridges)
library(pdp)
library(DiagrammeR)

# Loading in data 
performance_data <- read.csv(path)


# Cleaning data
complete_set <- complete_set %>%
  dplyr::select(-X)

##########################
#### Making functions ####
##########################

# Random forest function
do_forest <- function(df) {
  forest <- randomForest(as.factor(number_rating) ~ ., data = df, 
                         ntree = 10000, mtry = round(sqrt(ncol(df) - 1)), sampsize = 1000, importance = TRUE)
  return(forest)
}

# Visualize forest function
visualize_forest <- function(forest_object) {
  model_vis <- as.data.frame(importance(forest_object))
  
  model_vis <- cbind(rownames(model_vis), model_vis)
  
  colnames(model_vis) <- c("variable", "1", "2", "3", "4", "accuracy", "gini")
  
  model_vis$name <- factor(model_vis$variable, levels = model_vis$variable[order(model_vis$accuracy)])
  
  
  model_vis %>%
    arrange(desc(accuracy)) %>%
    slice(1:10) %>%
    ggplot(aes(x = as.factor(name), y = accuracy))+
    geom_point(size = 3, alpha = .3)+
    geom_segment(aes(x=variable, 
                     xend=variable, 
                     y=0, 
                     yend=max(accuracy)), 
                 linetype="dashed", 
                 size=0.1)+
    theme_bw()+
    labs(title="Dot plot", 
         subtitle="Variables sorted by mean decrease in accuracy",
         x = "",
         y = "") +  
    coord_flip()
}

# Confusion matrix function
do_confusion <- function(model, train_set, valid_set) {
  pred_train <- predict(model, train_set, type = "class")
  
  table(pred_train, train_set$number_rating)
  
  pred_valid <- predict(model, valid_set, type = "class")
  
  confusion <- table(pred_valid, valid_set$number_rating)
  
  confuse_matrix <- confusionMatrix(confusion)
  return(confuse_matrix)
}


# Interaction between housing median price and the relation between parental satisfaction and ratings
library(MASS)
library(stargazer)

mod_price <- polr(as.factor(number_rating) ~ satisfaction + house_price_2017_std +
                    town_avg_std + 
                    pct_special_needs + free_meals_pct +
                    n_teachers + pup_tea_ratio + 
                    as.factor(year.x) + as.factor(town), data = performance_data,
                  Hess = TRUE)

mod_price_interact <- polr(as.factor(number_rating) ~ satisfaction*house_price_2017_std +
                             town_avg_std + 
                             pct_special_needs + free_meals_pct +
                             n_teachers + pup_tea_ratio + 
                             as.factor(year.x)+ as.factor(town), data = performance_data,
                           Hess = TRUE)

mod_unemploy <-  polr(as.factor(number_rating) ~ satisfaction*unemployment_rate +
                        town_avg_std + 
                        pct_special_needs + free_meals_pct +
                        n_teachers + pup_tea_ratio + 
                        as.factor(year.x)+ as.factor(town), data = performance_data,
                      Hess = TRUE)

mod_poverty <-  polr(as.factor(number_rating) ~ satisfaction*relative_poverty+
                       town_avg_std + 
                       pct_special_needs + free_meals_pct +
                       n_teachers + pup_tea_ratio + 
                       as.factor(year.x)+ as.factor(town), data = performance_data,
                     Hess = TRUE)

mod_turnout <-  polr(as.factor(number_rating) ~ satisfaction*turnout_2017+
                       town_avg_std + 
                       pct_special_needs + free_meals_pct +
                       n_teachers + pup_tea_ratio + 
                       as.factor(year.x)+ as.factor(town), data = performance_data,
                     Hess = TRUE)

stargazer(mod_price, mod_price_interact, mod_unemploy, mod_poverty, mod_turnout, 
          keep = c("satisfaction",
                   "satisfaction*house_price_2017_std",
                   "satisfaction*unemployment_rate",
                   "satisfaction*relative_poverty",
                   "satisfaction*turnout_2017"),
          type = "text")

# Visualizing the interaction effect
library(interplot)

interaction_plot <- function(mod, var1, var2, xlab, ylab) {
  interplot(mod, var1, var2)+
    geom_line(size = 1, color = "gray50")+
    xlab(xlab)+
    ylab(ylab)+
    theme_classic()+
    theme(text=element_text(size=8,  family="NewCenturySchoolbook"))+
    theme(line = element_line(size = 1))
}

summary(performance_data$house_price_2017_std)

p1 <- interaction_plot(mod_price_interact, var1 = "satisfaction", var2 = "house_price_2017_std",
                       xlab = "Median housing price 2017 (standardized)", ylab = "Marginal effect of Parental satisfaction")

p2 <- interaction_plot(mod_unemploy, var1 = "satisfaction", var2 = "unemployment_rate",
                       xlab = "Unemployment rate",
                       ylab = "Marginal effect of Parental satisfaction")

p3 <- interaction_plot(mod_poverty, var1 = "satisfaction", var2 = "relative_poverty",
                       xlab = "Child poverty rate",
                       ylab = "Marginal effect of Parental satisfaction")

p4 <- interaction_plot(mod_turnout, var1 = "satisfaction", var2 = "turnout_2017",
                       xlab = "Turnout % in the 2017 election",
                       ylab = "Marginal effect of Parental satisfaction")


(p1 | p4) / (p3 | p2)